@@ -81,6 +81,7 @@ gem 'string-scrub' # for ruby <2.1 |
||
| 81 | 81 |
gem 'therubyracer', '~> 0.12.1' |
| 82 | 82 |
gem 'typhoeus', '~> 0.6.3' |
| 83 | 83 |
gem 'uglifier', '>= 1.3.0' |
| 84 |
+gem 'hypdf', '~> 1.0.7' |
|
| 84 | 85 |
|
| 85 | 86 |
group :development do |
| 86 | 87 |
gem 'better_errors', '~> 1.1' |
@@ -174,12 +174,17 @@ GEM |
||
| 174 | 174 |
hipchat (1.2.0) |
| 175 | 175 |
httparty |
| 176 | 176 |
hpricot (0.8.6) |
| 177 |
+ httmultiparty (0.3.10) |
|
| 178 |
+ httparty (>= 0.7.3) |
|
| 179 |
+ multipart-post |
|
| 177 | 180 |
http (0.5.1) |
| 178 | 181 |
http_parser.rb |
| 179 | 182 |
http_parser.rb (0.6.0) |
| 180 | 183 |
httparty (0.13.1) |
| 181 | 184 |
json (~> 1.8) |
| 182 | 185 |
multi_xml (>= 0.5.2) |
| 186 |
+ hypdf (1.0.7) |
|
| 187 |
+ httmultiparty (= 0.3.10) |
|
| 183 | 188 |
i18n (0.6.11) |
| 184 | 189 |
jquery-rails (3.1.1) |
| 185 | 190 |
railties (>= 3.0, < 5.0) |
@@ -475,6 +480,7 @@ DEPENDENCIES |
||
| 475 | 480 |
guard-rspec |
| 476 | 481 |
hipchat (~> 1.2.0) |
| 477 | 482 |
httparty (~> 0.13) |
| 483 |
+ hypdf (~> 1.0.7) |
|
| 478 | 484 |
jquery-rails (~> 3.1.0) |
| 479 | 485 |
json (~> 1.8.1) |
| 480 | 486 |
jsonpath (~> 0.5.6) |
@@ -0,0 +1,62 @@ |
||
| 1 |
+require 'open-uri' |
|
| 2 |
+require 'hypdf' |
|
| 3 |
+ |
|
| 4 |
+module Agents |
|
| 5 |
+ class PdfInfoAgent < Agent |
|
| 6 |
+ |
|
| 7 |
+ gem_dependency_check { defined?(HyPDF) }
|
|
| 8 |
+ |
|
| 9 |
+ cannot_be_scheduled! |
|
| 10 |
+ |
|
| 11 |
+ description <<-MD |
|
| 12 |
+ In order for this agent to work, you need to have [HyPDF](https://devcenter.heroku.com/articles/hypdf) running and configured. |
|
| 13 |
+ |
|
| 14 |
+ It works by acting on events that contain a key `url` in their payload, and runs the [pdfinfo](https://devcenter.heroku.com/articles/hypdf#pdfinfo) command on them. |
|
| 15 |
+ MD |
|
| 16 |
+ |
|
| 17 |
+ event_description <<-MD |
|
| 18 |
+ This will change based on the metadata in the pdf. |
|
| 19 |
+ |
|
| 20 |
+ { "Title"=>"Everyday Rails Testing with RSpec",
|
|
| 21 |
+ "Author"=>"Aaron Sumner", |
|
| 22 |
+ "Creator"=>"LaTeX with hyperref package", |
|
| 23 |
+ "Producer"=>"xdvipdfmx (0.7.8)", |
|
| 24 |
+ "CreationDate"=>"Fri Aug 2 05", |
|
| 25 |
+ "32"=>"50 2013", |
|
| 26 |
+ "Tagged"=>"no", |
|
| 27 |
+ "Pages"=>"150", |
|
| 28 |
+ "Encrypted"=>"no", |
|
| 29 |
+ "Page size"=>"612 x 792 pts (letter)", |
|
| 30 |
+ "Optimized"=>"no", |
|
| 31 |
+ "PDF version"=>"1.5", |
|
| 32 |
+ "url": "your url" |
|
| 33 |
+ } |
|
| 34 |
+ MD |
|
| 35 |
+ |
|
| 36 |
+ def working? |
|
| 37 |
+ !recent_error_logs? |
|
| 38 |
+ end |
|
| 39 |
+ |
|
| 40 |
+ def default_options |
|
| 41 |
+ {}
|
|
| 42 |
+ end |
|
| 43 |
+ |
|
| 44 |
+ def receive(incoming_events) |
|
| 45 |
+ incoming_events.each do |event| |
|
| 46 |
+ interpolate_with(event) do |
|
| 47 |
+ url_to_scrape = event.payload['url'] |
|
| 48 |
+ check_url(url_to_scrape, event.payload) if url_to_scrape =~ /^https?:\/\//i |
|
| 49 |
+ end |
|
| 50 |
+ end |
|
| 51 |
+ end |
|
| 52 |
+ |
|
| 53 |
+ def check_url(in_url, payload) |
|
| 54 |
+ return unless in_url.present? |
|
| 55 |
+ Array(in_url).each do |url| |
|
| 56 |
+ log "Fetching #{url}"
|
|
| 57 |
+ info = HyPDF.pdfinfo(open(url)) |
|
| 58 |
+ create_event :payload => info.merge(payload) |
|
| 59 |
+ end |
|
| 60 |
+ end |
|
| 61 |
+ end |
|
| 62 |
+end |